Load scripts: loads libraries and useful scripts used in the analyses; all .R files contained in scripts at the root of the factory are automatically loaded
Load data: imports datasets, and may contain some ad hoc changes to the data such as specific data cleaning (not used in other reports), new variables used in the analyses, etc.
library(reportfactory)
library(here)
library(rio)
library(tidyverse)
library(incidence)
library(distcrete)
library(epitrix)
library(earlyR)
library(projections)
library(linelist)
library(remotes)
library(janitor)
library(kableExtra)
library(DT)
library(cyphr)
library(chngpt)
library(lubridate)
library(ggpubr)
library(ggnewscale)These scripts will load:
.R files inside /scripts/.R files inside /src/These scripts also contain routines to access the latest clean encrypted data (see next section).
We import the latest NHS pathways data:
x <- import_pathways() %>%
as_tibble()
x
## [90m# A tibble: 114,939 x 9[39m
## site_type date sex age ccg_code ccg_name count postcode nhs_region
## [3m[90m<chr>[39m[23m [3m[90m<date>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<int>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m
## [90m 1[39m 111 2020-03-18 female 0-18 e380000… nhs_bar… 35 rm13ae london
## [90m 2[39m 111 2020-03-18 female 0-18 e380000… nhs_bed… 27 mk454hr east_of_e…
## [90m 3[39m 111 2020-03-18 female 0-18 e380000… nhs_bla… 9 bb12fd north_west
## [90m 4[39m 111 2020-03-18 female 0-18 e380000… nhs_bro… 11 br33ql london
## [90m 5[39m 111 2020-03-18 female 0-18 e380000… nhs_can… 9 ws111jp midlands
## [90m 6[39m 111 2020-03-18 female 0-18 e380000… nhs_cit… 12 n15lz london
## [90m 7[39m 111 2020-03-18 female 0-18 e380000… nhs_enf… 7 en40dy london
## [90m 8[39m 111 2020-03-18 female 0-18 e380000… nhs_ham… 6 dl62uu north_eas…
## [90m 9[39m 111 2020-03-18 female 0-18 e380000… nhs_har… 24 ts232la north_eas…
## [90m10[39m 111 2020-03-18 female 0-18 e380000… nhs_kin… 6 kt11eu london
## [90m# … with 114,929 more rows[39mWe also import demographics data for NHS regions in England, used later in our analysis:
path <- here::here("data", "csv", "nhs_region_population_2018.csv")
nhs_region_pop <- rio::import(path) %>%
mutate(nhs_region = str_to_title(gsub("_"," ",nhs_region)))
nhs_region_pop$nhs_region <- gsub(" Of ", " of ", nhs_region_pop$nhs_region)
nhs_region_pop$nhs_region <- gsub(" And ", " and ", nhs_region_pop$nhs_region)
nhs_region_pop
## nhs_region variable value
## 1 North West 0-18 0.22538599
## 2 North East and Yorkshire 0-18 0.21876449
## 3 Midlands 0-18 0.22564656
## 4 East of England 0-18 0.22810783
## 5 London 0-18 0.23764782
## 6 South East 0-18 0.22458811
## 7 South West 0-18 0.20799797
## 8 North West 19-69 0.64274078
## 9 North East and Yorkshire 19-69 0.64437753
## 10 Midlands 19-69 0.63876675
## 11 East of England 19-69 0.63034229
## 12 London 19-69 0.67820084
## 13 South East 19-69 0.63267336
## 14 South West 19-69 0.63176131
## 15 North West 70-120 0.13187323
## 16 North East and Yorkshire 70-120 0.13685797
## 17 Midlands 70-120 0.13558669
## 18 East of England 70-120 0.14154988
## 19 London 70-120 0.08415135
## 20 South East 70-120 0.14273853
## 21 South West 70-120 0.16024072Finally, we import publically available deaths per NHS region:
dth <- import_deaths() %>%
mutate(nhs_region = str_to_title(gsub("_"," ",nhs_region)))
#truncation to account for reporting delay
delay_max <- 21
dth$nhs_region <- gsub(" Of ", " of ", dth$nhs_region)
dth$nhs_region <- gsub(" And ", " and ", dth$nhs_region)
dth
## date_report nhs_region deaths
## 1 2020-03-01 East of England 0
## 2 2020-03-02 East of England 1
## 3 2020-03-03 East of England 0
## 4 2020-03-04 East of England 0
## 5 2020-03-05 East of England 0
## 6 2020-03-06 East of England 1
## 7 2020-03-07 East of England 0
## 8 2020-03-08 East of England 0
## 9 2020-03-09 East of England 1
## 10 2020-03-10 East of England 0
## 11 2020-03-11 East of England 0
## 12 2020-03-12 East of England 0
## 13 2020-03-13 East of England 1
## 14 2020-03-14 East of England 2
## 15 2020-03-15 East of England 2
## 16 2020-03-16 East of England 1
## 17 2020-03-17 East of England 1
## 18 2020-03-18 East of England 5
## 19 2020-03-19 East of England 4
## 20 2020-03-20 East of England 2
## 21 2020-03-21 East of England 11
## 22 2020-03-22 East of England 11
## 23 2020-03-23 East of England 11
## 24 2020-03-24 East of England 19
## 25 2020-03-25 East of England 24
## 26 2020-03-26 East of England 37
## 27 2020-03-27 East of England 38
## 28 2020-03-28 East of England 27
## 29 2020-03-29 East of England 55
## 30 2020-03-30 East of England 73
## 31 2020-03-31 East of England 36
## 32 2020-04-01 East of England 60
## 33 2020-04-02 East of England 67
## 34 2020-04-03 East of England 73
## 35 2020-04-04 East of England 71
## 36 2020-04-05 East of England 75
## 37 2020-04-06 East of England 71
## 38 2020-04-07 East of England 92
## 39 2020-04-08 East of England 111
## 40 2020-04-09 East of England 87
## 41 2020-04-10 East of England 74
## 42 2020-04-11 East of England 91
## 43 2020-04-12 East of England 101
## 44 2020-04-13 East of England 77
## 45 2020-04-14 East of England 61
## 46 2020-04-15 East of England 82
## 47 2020-04-16 East of England 74
## 48 2020-04-17 East of England 86
## 49 2020-04-18 East of England 63
## 50 2020-04-19 East of England 66
## 51 2020-04-20 East of England 66
## 52 2020-04-21 East of England 74
## 53 2020-04-22 East of England 66
## 54 2020-04-23 East of England 49
## 55 2020-04-24 East of England 64
## 56 2020-04-25 East of England 54
## 57 2020-04-26 East of England 49
## 58 2020-04-27 East of England 45
## 59 2020-04-28 East of England 57
## 60 2020-04-29 East of England 32
## 61 2020-04-30 East of England 43
## 62 2020-05-01 East of England 48
## 63 2020-05-02 East of England 29
## 64 2020-05-03 East of England 41
## 65 2020-05-04 East of England 18
## 66 2020-05-05 East of England 35
## 67 2020-05-06 East of England 28
## 68 2020-05-07 East of England 33
## 69 2020-05-08 East of England 30
## 70 2020-05-09 East of England 23
## 71 2020-05-10 East of England 21
## 72 2020-05-11 East of England 18
## 73 2020-05-12 East of England 19
## 74 2020-05-13 East of England 25
## 75 2020-05-14 East of England 24
## 76 2020-05-15 East of England 13
## 77 2020-05-16 East of England 18
## 78 2020-05-17 East of England 11
## 79 2020-05-18 East of England 8
## 80 2020-05-19 East of England 5
## 81 2020-03-01 London 0
## 82 2020-03-02 London 0
## 83 2020-03-03 London 0
## 84 2020-03-04 London 0
## 85 2020-03-05 London 0
## 86 2020-03-06 London 1
## 87 2020-03-07 London 1
## 88 2020-03-08 London 0
## 89 2020-03-09 London 1
## 90 2020-03-10 London 0
## 91 2020-03-11 London 7
## 92 2020-03-12 London 6
## 93 2020-03-13 London 10
## 94 2020-03-14 London 14
## 95 2020-03-15 London 10
## 96 2020-03-16 London 17
## 97 2020-03-17 London 25
## 98 2020-03-18 London 31
## 99 2020-03-19 London 25
## 100 2020-03-20 London 45
## 101 2020-03-21 London 49
## 102 2020-03-22 London 54
## 103 2020-03-23 London 62
## 104 2020-03-24 London 86
## 105 2020-03-25 London 106
## 106 2020-03-26 London 130
## 107 2020-03-27 London 135
## 108 2020-03-28 London 127
## 109 2020-03-29 London 189
## 110 2020-03-30 London 180
## 111 2020-03-31 London 139
## 112 2020-04-01 London 188
## 113 2020-04-02 London 176
## 114 2020-04-03 London 188
## 115 2020-04-04 London 229
## 116 2020-04-05 London 194
## 117 2020-04-06 London 198
## 118 2020-04-07 London 216
## 119 2020-04-08 London 236
## 120 2020-04-09 London 200
## 121 2020-04-10 London 168
## 122 2020-04-11 London 175
## 123 2020-04-12 London 156
## 124 2020-04-13 London 165
## 125 2020-04-14 London 142
## 126 2020-04-15 London 142
## 127 2020-04-16 London 138
## 128 2020-04-17 London 99
## 129 2020-04-18 London 101
## 130 2020-04-19 London 102
## 131 2020-04-20 London 94
## 132 2020-04-21 London 93
## 133 2020-04-22 London 108
## 134 2020-04-23 London 77
## 135 2020-04-24 London 71
## 136 2020-04-25 London 57
## 137 2020-04-26 London 53
## 138 2020-04-27 London 51
## 139 2020-04-28 London 43
## 140 2020-04-29 London 43
## 141 2020-04-30 London 39
## 142 2020-05-01 London 40
## 143 2020-05-02 London 40
## 144 2020-05-03 London 35
## 145 2020-05-04 London 29
## 146 2020-05-05 London 25
## 147 2020-05-06 London 34
## 148 2020-05-07 London 35
## 149 2020-05-08 London 29
## 150 2020-05-09 London 22
## 151 2020-05-10 London 25
## 152 2020-05-11 London 16
## 153 2020-05-12 London 16
## 154 2020-05-13 London 16
## 155 2020-05-14 London 20
## 156 2020-05-15 London 14
## 157 2020-05-16 London 11
## 158 2020-05-17 London 15
## 159 2020-05-18 London 5
## 160 2020-05-19 London 0
## 161 2020-03-01 Midlands 0
## 162 2020-03-02 Midlands 0
## 163 2020-03-03 Midlands 1
## 164 2020-03-04 Midlands 0
## 165 2020-03-05 Midlands 0
## 166 2020-03-06 Midlands 0
## 167 2020-03-07 Midlands 0
## 168 2020-03-08 Midlands 3
## 169 2020-03-09 Midlands 1
## 170 2020-03-10 Midlands 0
## 171 2020-03-11 Midlands 2
## 172 2020-03-12 Midlands 6
## 173 2020-03-13 Midlands 5
## 174 2020-03-14 Midlands 4
## 175 2020-03-15 Midlands 5
## 176 2020-03-16 Midlands 11
## 177 2020-03-17 Midlands 8
## 178 2020-03-18 Midlands 13
## 179 2020-03-19 Midlands 8
## 180 2020-03-20 Midlands 28
## 181 2020-03-21 Midlands 13
## 182 2020-03-22 Midlands 31
## 183 2020-03-23 Midlands 33
## 184 2020-03-24 Midlands 41
## 185 2020-03-25 Midlands 46
## 186 2020-03-26 Midlands 62
## 187 2020-03-27 Midlands 76
## 188 2020-03-28 Midlands 84
## 189 2020-03-29 Midlands 140
## 190 2020-03-30 Midlands 120
## 191 2020-03-31 Midlands 67
## 192 2020-04-01 Midlands 129
## 193 2020-04-02 Midlands 138
## 194 2020-04-03 Midlands 123
## 195 2020-04-04 Midlands 149
## 196 2020-04-05 Midlands 164
## 197 2020-04-06 Midlands 140
## 198 2020-04-07 Midlands 123
## 199 2020-04-08 Midlands 185
## 200 2020-04-09 Midlands 138
## 201 2020-04-10 Midlands 127
## 202 2020-04-11 Midlands 142
## 203 2020-04-12 Midlands 138
## 204 2020-04-13 Midlands 120
## 205 2020-04-14 Midlands 116
## 206 2020-04-15 Midlands 147
## 207 2020-04-16 Midlands 101
## 208 2020-04-17 Midlands 118
## 209 2020-04-18 Midlands 115
## 210 2020-04-19 Midlands 91
## 211 2020-04-20 Midlands 107
## 212 2020-04-21 Midlands 86
## 213 2020-04-22 Midlands 77
## 214 2020-04-23 Midlands 100
## 215 2020-04-24 Midlands 76
## 216 2020-04-25 Midlands 72
## 217 2020-04-26 Midlands 77
## 218 2020-04-27 Midlands 74
## 219 2020-04-28 Midlands 67
## 220 2020-04-29 Midlands 53
## 221 2020-04-30 Midlands 53
## 222 2020-05-01 Midlands 63
## 223 2020-05-02 Midlands 51
## 224 2020-05-03 Midlands 50
## 225 2020-05-04 Midlands 58
## 226 2020-05-05 Midlands 56
## 227 2020-05-06 Midlands 55
## 228 2020-05-07 Midlands 48
## 229 2020-05-08 Midlands 34
## 230 2020-05-09 Midlands 37
## 231 2020-05-10 Midlands 40
## 232 2020-05-11 Midlands 31
## 233 2020-05-12 Midlands 43
## 234 2020-05-13 Midlands 37
## 235 2020-05-14 Midlands 31
## 236 2020-05-15 Midlands 33
## 237 2020-05-16 Midlands 30
## 238 2020-05-17 Midlands 28
## 239 2020-05-18 Midlands 23
## 240 2020-05-19 Midlands 8
## 241 2020-03-01 North East and Yorkshire 0
## 242 2020-03-02 North East and Yorkshire 0
## 243 2020-03-03 North East and Yorkshire 0
## 244 2020-03-04 North East and Yorkshire 0
## 245 2020-03-05 North East and Yorkshire 0
## 246 2020-03-06 North East and Yorkshire 0
## 247 2020-03-07 North East and Yorkshire 0
## 248 2020-03-08 North East and Yorkshire 0
## 249 2020-03-09 North East and Yorkshire 0
## 250 2020-03-10 North East and Yorkshire 0
## 251 2020-03-11 North East and Yorkshire 0
## 252 2020-03-12 North East and Yorkshire 0
## 253 2020-03-13 North East and Yorkshire 0
## 254 2020-03-14 North East and Yorkshire 0
## 255 2020-03-15 North East and Yorkshire 2
## 256 2020-03-16 North East and Yorkshire 3
## 257 2020-03-17 North East and Yorkshire 1
## 258 2020-03-18 North East and Yorkshire 2
## 259 2020-03-19 North East and Yorkshire 6
## 260 2020-03-20 North East and Yorkshire 5
## 261 2020-03-21 North East and Yorkshire 6
## 262 2020-03-22 North East and Yorkshire 7
## 263 2020-03-23 North East and Yorkshire 9
## 264 2020-03-24 North East and Yorkshire 7
## 265 2020-03-25 North East and Yorkshire 18
## 266 2020-03-26 North East and Yorkshire 19
## 267 2020-03-27 North East and Yorkshire 28
## 268 2020-03-28 North East and Yorkshire 35
## 269 2020-03-29 North East and Yorkshire 65
## 270 2020-03-30 North East and Yorkshire 69
## 271 2020-03-31 North East and Yorkshire 41
## 272 2020-04-01 North East and Yorkshire 61
## 273 2020-04-02 North East and Yorkshire 74
## 274 2020-04-03 North East and Yorkshire 96
## 275 2020-04-04 North East and Yorkshire 104
## 276 2020-04-05 North East and Yorkshire 92
## 277 2020-04-06 North East and Yorkshire 95
## 278 2020-04-07 North East and Yorkshire 102
## 279 2020-04-08 North East and Yorkshire 105
## 280 2020-04-09 North East and Yorkshire 111
## 281 2020-04-10 North East and Yorkshire 117
## 282 2020-04-11 North East and Yorkshire 98
## 283 2020-04-12 North East and Yorkshire 84
## 284 2020-04-13 North East and Yorkshire 94
## 285 2020-04-14 North East and Yorkshire 107
## 286 2020-04-15 North East and Yorkshire 95
## 287 2020-04-16 North East and Yorkshire 103
## 288 2020-04-17 North East and Yorkshire 86
## 289 2020-04-18 North East and Yorkshire 95
## 290 2020-04-19 North East and Yorkshire 87
## 291 2020-04-20 North East and Yorkshire 100
## 292 2020-04-21 North East and Yorkshire 76
## 293 2020-04-22 North East and Yorkshire 83
## 294 2020-04-23 North East and Yorkshire 62
## 295 2020-04-24 North East and Yorkshire 72
## 296 2020-04-25 North East and Yorkshire 68
## 297 2020-04-26 North East and Yorkshire 63
## 298 2020-04-27 North East and Yorkshire 65
## 299 2020-04-28 North East and Yorkshire 57
## 300 2020-04-29 North East and Yorkshire 69
## 301 2020-04-30 North East and Yorkshire 56
## 302 2020-05-01 North East and Yorkshire 64
## 303 2020-05-02 North East and Yorkshire 47
## 304 2020-05-03 North East and Yorkshire 39
## 305 2020-05-04 North East and Yorkshire 48
## 306 2020-05-05 North East and Yorkshire 39
## 307 2020-05-06 North East and Yorkshire 50
## 308 2020-05-07 North East and Yorkshire 40
## 309 2020-05-08 North East and Yorkshire 38
## 310 2020-05-09 North East and Yorkshire 43
## 311 2020-05-10 North East and Yorkshire 39
## 312 2020-05-11 North East and Yorkshire 28
## 313 2020-05-12 North East and Yorkshire 25
## 314 2020-05-13 North East and Yorkshire 24
## 315 2020-05-14 North East and Yorkshire 27
## 316 2020-05-15 North East and Yorkshire 29
## 317 2020-05-16 North East and Yorkshire 35
## 318 2020-05-17 North East and Yorkshire 22
## 319 2020-05-18 North East and Yorkshire 21
## 320 2020-05-19 North East and Yorkshire 12
## 321 2020-03-01 North West 0
## 322 2020-03-02 North West 0
## 323 2020-03-03 North West 0
## 324 2020-03-04 North West 0
## 325 2020-03-05 North West 1
## 326 2020-03-06 North West 0
## 327 2020-03-07 North West 0
## 328 2020-03-08 North West 1
## 329 2020-03-09 North West 0
## 330 2020-03-10 North West 0
## 331 2020-03-11 North West 0
## 332 2020-03-12 North West 2
## 333 2020-03-13 North West 2
## 334 2020-03-14 North West 1
## 335 2020-03-15 North West 4
## 336 2020-03-16 North West 2
## 337 2020-03-17 North West 4
## 338 2020-03-18 North West 6
## 339 2020-03-19 North West 6
## 340 2020-03-20 North West 10
## 341 2020-03-21 North West 11
## 342 2020-03-22 North West 13
## 343 2020-03-23 North West 15
## 344 2020-03-24 North West 21
## 345 2020-03-25 North West 19
## 346 2020-03-26 North West 31
## 347 2020-03-27 North West 31
## 348 2020-03-28 North West 30
## 349 2020-03-29 North West 67
## 350 2020-03-30 North West 69
## 351 2020-03-31 North West 36
## 352 2020-04-01 North West 85
## 353 2020-04-02 North West 94
## 354 2020-04-03 North West 88
## 355 2020-04-04 North West 98
## 356 2020-04-05 North West 102
## 357 2020-04-06 North West 100
## 358 2020-04-07 North West 133
## 359 2020-04-08 North West 123
## 360 2020-04-09 North West 117
## 361 2020-04-10 North West 114
## 362 2020-04-11 North West 135
## 363 2020-04-12 North West 125
## 364 2020-04-13 North West 124
## 365 2020-04-14 North West 129
## 366 2020-04-15 North West 114
## 367 2020-04-16 North West 133
## 368 2020-04-17 North West 96
## 369 2020-04-18 North West 112
## 370 2020-04-19 North West 70
## 371 2020-04-20 North West 79
## 372 2020-04-21 North West 75
## 373 2020-04-22 North West 77
## 374 2020-04-23 North West 84
## 375 2020-04-24 North West 64
## 376 2020-04-25 North West 65
## 377 2020-04-26 North West 54
## 378 2020-04-27 North West 54
## 379 2020-04-28 North West 56
## 380 2020-04-29 North West 62
## 381 2020-04-30 North West 57
## 382 2020-05-01 North West 43
## 383 2020-05-02 North West 55
## 384 2020-05-03 North West 54
## 385 2020-05-04 North West 44
## 386 2020-05-05 North West 46
## 387 2020-05-06 North West 41
## 388 2020-05-07 North West 44
## 389 2020-05-08 North West 40
## 390 2020-05-09 North West 28
## 391 2020-05-10 North West 38
## 392 2020-05-11 North West 32
## 393 2020-05-12 North West 35
## 394 2020-05-13 North West 24
## 395 2020-05-14 North West 25
## 396 2020-05-15 North West 31
## 397 2020-05-16 North West 27
## 398 2020-05-17 North West 16
## 399 2020-05-18 North West 19
## 400 2020-05-19 North West 4
## 401 2020-03-01 South East 0
## 402 2020-03-02 South East 0
## 403 2020-03-03 South East 1
## 404 2020-03-04 South East 0
## 405 2020-03-05 South East 1
## 406 2020-03-06 South East 0
## 407 2020-03-07 South East 0
## 408 2020-03-08 South East 1
## 409 2020-03-09 South East 1
## 410 2020-03-10 South East 1
## 411 2020-03-11 South East 1
## 412 2020-03-12 South East 0
## 413 2020-03-13 South East 1
## 414 2020-03-14 South East 1
## 415 2020-03-15 South East 5
## 416 2020-03-16 South East 8
## 417 2020-03-17 South East 7
## 418 2020-03-18 South East 10
## 419 2020-03-19 South East 9
## 420 2020-03-20 South East 13
## 421 2020-03-21 South East 7
## 422 2020-03-22 South East 25
## 423 2020-03-23 South East 20
## 424 2020-03-24 South East 22
## 425 2020-03-25 South East 27
## 426 2020-03-26 South East 34
## 427 2020-03-27 South East 39
## 428 2020-03-28 South East 32
## 429 2020-03-29 South East 74
## 430 2020-03-30 South East 68
## 431 2020-03-31 South East 38
## 432 2020-04-01 South East 66
## 433 2020-04-02 South East 56
## 434 2020-04-03 South East 68
## 435 2020-04-04 South East 80
## 436 2020-04-05 South East 80
## 437 2020-04-06 South East 87
## 438 2020-04-07 South East 99
## 439 2020-04-08 South East 82
## 440 2020-04-09 South East 104
## 441 2020-04-10 South East 88
## 442 2020-04-11 South East 87
## 443 2020-04-12 South East 88
## 444 2020-04-13 South East 83
## 445 2020-04-14 South East 64
## 446 2020-04-15 South East 72
## 447 2020-04-16 South East 56
## 448 2020-04-17 South East 86
## 449 2020-04-18 South East 57
## 450 2020-04-19 South East 69
## 451 2020-04-20 South East 85
## 452 2020-04-21 South East 49
## 453 2020-04-22 South East 54
## 454 2020-04-23 South East 57
## 455 2020-04-24 South East 64
## 456 2020-04-25 South East 50
## 457 2020-04-26 South East 51
## 458 2020-04-27 South East 40
## 459 2020-04-28 South East 40
## 460 2020-04-29 South East 46
## 461 2020-04-30 South East 28
## 462 2020-05-01 South East 37
## 463 2020-05-02 South East 35
## 464 2020-05-03 South East 16
## 465 2020-05-04 South East 34
## 466 2020-05-05 South East 29
## 467 2020-05-06 South East 22
## 468 2020-05-07 South East 25
## 469 2020-05-08 South East 25
## 470 2020-05-09 South East 28
## 471 2020-05-10 South East 19
## 472 2020-05-11 South East 20
## 473 2020-05-12 South East 26
## 474 2020-05-13 South East 16
## 475 2020-05-14 South East 30
## 476 2020-05-15 South East 22
## 477 2020-05-16 South East 15
## 478 2020-05-17 South East 13
## 479 2020-05-18 South East 12
## 480 2020-05-19 South East 0
## 481 2020-03-01 South West 0
## 482 2020-03-02 South West 0
## 483 2020-03-03 South West 0
## 484 2020-03-04 South West 0
## 485 2020-03-05 South West 0
## 486 2020-03-06 South West 0
## 487 2020-03-07 South West 0
## 488 2020-03-08 South West 0
## 489 2020-03-09 South West 0
## 490 2020-03-10 South West 0
## 491 2020-03-11 South West 1
## 492 2020-03-12 South West 0
## 493 2020-03-13 South West 0
## 494 2020-03-14 South West 1
## 495 2020-03-15 South West 0
## 496 2020-03-16 South West 0
## 497 2020-03-17 South West 2
## 498 2020-03-18 South West 2
## 499 2020-03-19 South West 4
## 500 2020-03-20 South West 3
## 501 2020-03-21 South West 6
## 502 2020-03-22 South West 9
## 503 2020-03-23 South West 9
## 504 2020-03-24 South West 7
## 505 2020-03-25 South West 9
## 506 2020-03-26 South West 11
## 507 2020-03-27 South West 14
## 508 2020-03-28 South West 22
## 509 2020-03-29 South West 22
## 510 2020-03-30 South West 28
## 511 2020-03-31 South West 15
## 512 2020-04-01 South West 25
## 513 2020-04-02 South West 21
## 514 2020-04-03 South West 28
## 515 2020-04-04 South West 40
## 516 2020-04-05 South West 32
## 517 2020-04-06 South West 34
## 518 2020-04-07 South West 39
## 519 2020-04-08 South West 47
## 520 2020-04-09 South West 24
## 521 2020-04-10 South West 46
## 522 2020-04-11 South West 43
## 523 2020-04-12 South West 23
## 524 2020-04-13 South West 26
## 525 2020-04-14 South West 24
## 526 2020-04-15 South West 31
## 527 2020-04-16 South West 29
## 528 2020-04-17 South West 32
## 529 2020-04-18 South West 25
## 530 2020-04-19 South West 31
## 531 2020-04-20 South West 26
## 532 2020-04-21 South West 26
## 533 2020-04-22 South West 22
## 534 2020-04-23 South West 17
## 535 2020-04-24 South West 19
## 536 2020-04-25 South West 15
## 537 2020-04-26 South West 26
## 538 2020-04-27 South West 13
## 539 2020-04-28 South West 17
## 540 2020-04-29 South West 14
## 541 2020-04-30 South West 26
## 542 2020-05-01 South West 6
## 543 2020-05-02 South West 6
## 544 2020-05-03 South West 9
## 545 2020-05-04 South West 16
## 546 2020-05-05 South West 14
## 547 2020-05-06 South West 18
## 548 2020-05-07 South West 16
## 549 2020-05-08 South West 5
## 550 2020-05-09 South West 10
## 551 2020-05-10 South West 5
## 552 2020-05-11 South West 7
## 553 2020-05-12 South West 7
## 554 2020-05-13 South West 7
## 555 2020-05-14 South West 6
## 556 2020-05-15 South West 3
## 557 2020-05-16 South West 4
## 558 2020-05-17 South West 6
## 559 2020-05-18 South West 4
## 560 2020-05-19 South West 2We extract the completion date from the NHS Pathways file timestamp:
The completion date of the NHS Pathways data is Wednesday 20 May 2020.
We add the following variable:
day: an integer representing the number of days from the earliest data reported, used for modelling purposes; the first day is 0These are functions which will be used further in the analyses.
Function to estimate the generalised R-squared as the proportion of deviance explained by a given model:
## Function to calculate R2 for Poisson model
## not adjusted for model complexity but all models have the same DF here
Rsq <- function(x) {
1 - (x$deviance / x$null.deviance)
}Function to extract growth rates per region as well as halving times, and the associated 95% confidence intervals:
## function to extract the coefficients, find the level of the intercept,
## reconstruct the values of r, get confidence intervals
get_r <- function(model) {
## extract coefficients and conf int
out <- data.frame(r = coef(model)) %>%
rownames_to_column("var") %>%
cbind(confint(model)) %>%
filter(!grepl("day_of_week", var)) %>%
filter(grepl("day", var)) %>%
rename(lower_95 = "2.5 %",
upper_95 = "97.5 %") %>%
mutate(var = sub("day:", "", var))
## reconstruct values: intercept + region-coefficient
for (i in 2:nrow(out)) {
out[i, -1] <- out[1, -1] + out[i, -1]
}
## find the name of the intercept, restore regions names
out <- out %>%
mutate(nhs_region = model$xlevels$nhs_region) %>%
select(nhs_region, everything(), -var)
## find halving times
halving <- log(0.5) / out[,-1] %>%
rename(halving_t = r,
halving_t_lower_95 = lower_95,
halving_t_upper_95 = upper_95)
## set halving times with exclusion intervals to NA
no_halving <- out$lower_95 < 0 & out$upper_95 > 0
halving[no_halving, ] <- NA_real_
## return all data
cbind(out, halving)
}Functions used in the correlation analysis between NHS Pathways reports and deaths:
## Function to calculate Pearson's correlation between deaths and lagged
## reports. Note that `pearson` can be replaced with `spearman` for rank
## correlation.
getcor <- function(x, ndx) {
return(cor(x$deaths[ndx],
x$note_lag[ndx],
use = "complete.obs",
method = "pearson"))
}
## Catch if sample size throws an error
getcor2 <- possibly(getcor, otherwise = NA)
getboot <- function(x) {
result <- boot::boot.ci(boot::boot(x, getcor2, R = 1000),
type = "bca")
return(data.frame(n = sum(!is.na(x$note_lag) & !is.na(x$deaths)),
r = result$t0,
r_low = result$bca[4],
r_hi = result$bca[5]))
}Function to classify the day of the week into weekend, Monday, and the rest:
## Fn to add day of week
day_of_week <- function(df) {
df %>%
dplyr::mutate(day_of_week = lubridate::wday(date, label = TRUE)) %>%
dplyr::mutate(day_of_week = dplyr::case_when(
day_of_week %in% c("Sat", "Sun") ~ "weekend",
day_of_week %in% c("Mon") ~ "monday",
!(day_of_week %in% c("Sat", "Sun", "Mon")) ~ "rest_of_week"
) %>%
factor(levels = c("rest_of_week", "monday", "weekend")))
}Custom color palettes, color scales, and vectors of colors:
We look for temporal patterns in COVID-19 related 111/999 calls and 111 online reports. Analyses are broken down by NHS region. We also look for estimates of recent growth rate and associated doubling / halving time.
tab_date_region_all <- x %>%
filter(!is.na(nhs_region)) %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
dth %>%
mutate(trusted = case_when(date_report < max(dth$date_report)-delay_max ~ "Y",
date_report >= max(dth$date_report)-delay_max ~ "N"),
value = "Deaths",
vline = max(dth$date_report)-delay_max-1,
lab = "Truncated for reporting delay",
lab_pos_x = vline + 8,
lab_pos_y = 150,
lab_col = "darkgrey") %>%
rename(date = date_report,
n = deaths) %>%
bind_rows(
mutate(tab_date_region_all, value = "Reports",
trusted = "Y",
vline = as.Date("2020-03-23"),
lab = "Start of UK lockdown",
lab_pos_x = vline - 6,
lab_pos_y = 30000,
lab_col = "black")
) %>%
mutate(value = factor(value, levels = c("Reports","Deaths"))) -> dths_reports
plot_dth_report <-
ggplot(dths_reports, aes(date, n, colour = nhs_region)) +
# Add main points and lines, coloured by region and fade out deaths for excluded period
geom_point(aes(alpha = trusted)) +
geom_line(alpha = 0.2) +
geom_smooth(method = "loess", span = .5, color = "black") +
scale_colour_manual("", values = pal) +
scale_alpha_manual(values = c(0.3,1)) +
guides(alpha = F) +
# Add vertical markers for important dates with labels - different for each facet
ggnewscale::new_scale_colour() +
geom_vline(aes(xintercept = vline, col = value), lty = "solid") +
geom_text(aes(x = lab_pos_x, y = lab_pos_y, label = lab, col = value), size = 3) +
scale_colour_manual("",values = c("black","darkgrey"), guide = F) +
# Facet by deaths and reports
facet_grid(rows = vars(value), scales = "free_y", switch = "y") +
# Other formatting
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",strip.placement = "outside") +
rotate_x +
labs(x = NULL,
y = NULL)
plot_dth_reportWe plot the number of 111/999 calls and 111 online reports by age, and the proportion of 111/999 calls and 111 online reports by age. In the second graph, the vertical lines indicate the proportion of individuals residing in the corresponding NHS region who belong to the corresponding age group.
tab_date_region_age_all <- x %>%
filter(!is.na(nhs_region),
age != "missing") %>%
group_by(date, nhs_region, age) %>%
summarise(n = sum(count))
tab_date_region_age_all %>%
ggplot(aes(x = date, y = n, fill = age)) +
geom_col(position = "stack") +
scale_fill_manual(values = age.pal) +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(fill = guide_legend(title = "Age", ncol = 3)) +
labs(x = NULL,
y = "Total daily reports by age") +
facet_wrap(~ nhs_region, ncol = 4)
tab_date_region_age_all <- tab_date_region_age_all %>%
group_by(date, nhs_region) %>%
summarise(tot = sum(n)) %>%
left_join(tab_date_region_age_all, by = c("date", "nhs_region")) %>%
mutate(prop_n = n/tot)
tab_date_region_age_all %>%
ggplot(aes(x = date, y = prop_n, color = age)) +
scale_color_manual(values = age.pal) +
geom_line() +
geom_point() +
geom_hline(data = nhs_region_pop, aes(yintercept = value, color = variable)) +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(color = guide_legend(title = "Age", ncol = 3)) +
labs(x = NULL,
y = "Proportion of daily reports by age") +
facet_wrap(~ nhs_region, ncol = 4)We fit quasi-Poisson GLMs for 14-day windows to get growth rates over time.
## set moving time window (1/2/3 weeks)
w <- 14
# create empty df
r_all_sliding <- NULL
## make data for model
x_model_all_moving <- x %>%
filter(!is.na(nhs_region)) %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
unique_dates <- unique(x_model_all_moving$date)
for (i in 1:(length(unique_dates) - w)) {
date_i <- unique_dates[i]
date_i_max <- date_i + w
model_data <- x_model_all_moving %>%
filter(date >= date_i & date < date_i_max) %>%
mutate(day = as.integer(date - date_i)) %>%
day_of_week()
mod <- glm(n ~ day * nhs_region + day_of_week,
data = model_data,
family = 'quasipoisson')
# get growth rate
r <- get_r(mod)
r$w_min <- date_i
r$w_max <- date_i_max
# combine all estimates
r_all_sliding <- bind_rows(r_all_sliding, r)
}
#serial interval distribution
SI_param = epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale,
w = 0.5)
#convert growth rates r to R0
r_all_sliding <- r_all_sliding %>%
mutate(R = epitrix::r2R0(r, SI_distribution),
R_lower_95 = epitrix::r2R0(lower_95, SI_distribution),
R_upper_95 = epitrix::r2R0(upper_95, SI_distribution))We examine the evolution of the growth rate by region over time.
# plot
plot_growth <-
r_all_sliding %>%
ggplot(aes(x = w_max, y = r)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(colour = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated daily growth rate (r)") +
scale_colour_manual(values = pal)From the growth rate, we derive R and examine its value through time.
# plot
plot_R <-
r_all_sliding %>%
ggplot(aes(x = w_max, y = R)) +
geom_ribbon(aes(ymin = R_lower_95, ymax = R_upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 1, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated effective reproduction\nnumber (Re)") +
scale_colour_manual(values = pal)
R <- r_all_sliding %>%
mutate(lower_95 = R_lower_95,
upper_95 = R_upper_95,
value = R,
measure = "R",
reference = 1)
r_R <- r_all_sliding %>%
mutate(measure = "r",
value = r,
reference = 0) %>%
bind_rows(R)
r_R %>%
ggplot(aes(x = w_max, y = value)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(aes(yintercept = reference), linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0,0, "cm"),
strip.background = element_blank(),
# strip.text.x = element_blank(),
strip.placement = "outside"
) +
guides(color = guide_legend(title = "",
override.aes = list(fill = NA)),
fill = FALSE) +
labs(x = "", y = "") +
scale_colour_manual(values = pal) +
facet_grid(rows = vars(measure),
scales = "free_y",
switch = "y",
labeller = as_labeller(c(r = "Daily growth rate (r)",
R = "Effective reproduction\nnumber (Re)")))We repeat the above analysis, where we fit quasi-Poisson GLMs for 14-day windows to get growth rates over time, but apply this to each age group separately (0-18, 19-69, 70-120 years old).
We first run the analysis for 0-18 years old.
## set moving time window (2 weeks)
w <- 14
# create empty df
r_all_sliding_0_18 <- NULL
## make data for model
x_model_all_moving_0_18 <- x %>%
filter(!is.na(nhs_region),
age == "0-18") %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
unique_dates <- unique(x_model_all_moving_0_18$date)
for (i in 1:(length(unique_dates) - w)) {
date_i <- unique_dates[i]
date_i_max <- date_i + w
model_data <- x_model_all_moving_0_18 %>%
filter(date >= date_i & date < date_i_max) %>%
mutate(day = as.integer(date - date_i)) %>%
day_of_week()
mod <- glm(n ~ day * nhs_region + day_of_week,
data = model_data,
family = 'quasipoisson')
# get growth rate
r <- get_r(mod)
r$w_min <- date_i
r$w_max <- date_i_max
# combine all estimates
r_all_sliding_0_18 <- bind_rows(r_all_sliding_0_18, r)
}
#serial interval distribution
SI_param = epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale, w = 0.5)
#convert growth rates r to R0
r_all_sliding_0_18 <- r_all_sliding_0_18 %>%
mutate(R = epitrix::r2R0(r, SI_distribution),
R_lower_95 = epitrix::r2R0(lower_95, SI_distribution),
R_upper_95 = epitrix::r2R0(upper_95, SI_distribution))# plot
plot_growth <-
r_all_sliding_0_18 %>%
ggplot(aes(x = w_max, y = r)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(colour = guide_legend(title = "",override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated daily growth rate (r)"
) +
scale_colour_manual(values = pal)# plot
plot_R <-
r_all_sliding_0_18 %>%
ggplot(aes(x = w_max, y = R)) +
geom_ribbon(aes(ymin = R_lower_95, ymax = R_upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 1, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated effective reproduction\nnumber (Re)"
) +
scale_colour_manual(values = pal)
R <- r_all_sliding_0_18 %>%
mutate(lower_95 = R_lower_95,
upper_95 = R_upper_95,
value = R,
measure = "R",
reference = 1)
r_R <- r_all_sliding_0_18 %>%
mutate(measure = "r",
value = r,
reference = 0) %>%
bind_rows(R)
fig2_3_0_18 <- r_R %>%
ggplot(aes(x = w_max, y = value)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(aes(yintercept = reference), linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0,0, "cm"),
strip.background = element_blank(),
strip.placement = "outside"
) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "", y = "") +
scale_colour_manual(values = pal) +
facet_grid(rows = vars(measure),
scales = "free_y",
switch = "y",
labeller = as_labeller(c(r = "Daily growth rate (r)",
R = "Effective reproduction\nnumber (Re)")))Then, we run the analysis for 19-69 years old.
## set moving time window (2 weeks)
w <- 14
# create empty df
r_all_sliding_19_69 <- NULL
## make data for model
x_model_all_moving_19_69 <- x %>%
filter(!is.na(nhs_region),
age == "19-69") %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
unique_dates <- unique(x_model_all_moving_19_69$date)
for (i in 1:(length(unique_dates) - w)) {
date_i <- unique_dates[i]
date_i_max <- date_i + w
model_data <- x_model_all_moving_19_69 %>%
filter(date >= date_i & date < date_i_max) %>%
mutate(day = as.integer(date - date_i)) %>%
day_of_week()
mod <- glm(n ~ day * nhs_region + day_of_week,
data = model_data,
family = 'quasipoisson')
# get growth rate
r <- get_r(mod)
r$w_min <- date_i
r$w_max <- date_i_max
# combine all estimates
r_all_sliding_19_69 <- bind_rows(r_all_sliding_19_69, r)
}
#serial interval distribution
SI_param = epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale, w = 0.5)
#convert growth rates r to R0
r_all_sliding_19_69 <- r_all_sliding_19_69 %>%
mutate(R = epitrix::r2R0(r, SI_distribution),
R_lower_95 = epitrix::r2R0(lower_95, SI_distribution),
R_upper_95 = epitrix::r2R0(upper_95, SI_distribution))# plot
plot_growth <-
r_all_sliding_19_69 %>%
ggplot(aes(x = w_max, y = r)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(colour = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated daily growth rate (r)") +
scale_colour_manual(values = pal)# plot
plot_R <-
r_all_sliding_19_69 %>%
ggplot(aes(x = w_max, y = R)) +
geom_ribbon(aes(ymin = R_lower_95, ymax = R_upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 1, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated effective reproduction\nnumber (Re)"
) +
scale_colour_manual(values = pal)
R <- r_all_sliding_19_69 %>%
mutate(lower_95 = R_lower_95,
upper_95 = R_upper_95,
value = R,
measure = "R",
reference = 1)
r_R <- r_all_sliding_19_69 %>%
mutate(measure = "r",
value = r,
reference = 0) %>%
bind_rows(R)
fig2_3_19_69 <- r_R %>%
ggplot(aes(x = w_max, y = value)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(aes(yintercept = reference), linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0,0, "cm"),
strip.background = element_blank(),
strip.placement = "outside"
) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "", y = "") +
scale_colour_manual(values = pal) +
facet_grid(rows = vars(measure),
scales = "free_y",
switch = "y",
labeller = as_labeller(c(r = "Daily growth rate (r)",
R = "Effective reproduction\nnumber (Re)")))Finally, we run the analysis for 70-120 years old.
## set moving time window (2 weeks)
w <- 14
# create empty df
r_all_sliding_70_120 <- NULL
## make data for model
x_model_all_moving_70_120 <- x %>%
filter(!is.na(nhs_region),
age == "70-120") %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
unique_dates <- unique(x_model_all_moving_70_120$date)
for (i in 1:(length(unique_dates) - w)) {
date_i <- unique_dates[i]
date_i_max <- date_i + w
model_data <- x_model_all_moving_70_120 %>%
filter(date >= date_i & date < date_i_max) %>%
mutate(day = as.integer(date - date_i)) %>%
day_of_week()
mod <- glm(n ~ day * nhs_region + day_of_week,
data = model_data,
family = 'quasipoisson')
# get growth rate
r <- get_r(mod)
r$w_min <- date_i
r$w_max <- date_i_max
# combine all estimates
r_all_sliding_70_120 <- bind_rows(r_all_sliding_70_120, r)
}
#serial interval distribution
SI_param = epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale, w = 0.5)
#convert growth rates r to R0
r_all_sliding_70_120 <- r_all_sliding_70_120 %>%
mutate(R = epitrix::r2R0(r, SI_distribution),
R_lower_95 = epitrix::r2R0(lower_95, SI_distribution),
R_upper_95 = epitrix::r2R0(upper_95, SI_distribution))# plot
plot_growth <-
r_all_sliding_70_120 %>%
ggplot(aes(x = w_max, y = r)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(colour = guide_legend(title = "",override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated daily growth rate (r)"
) +
scale_colour_manual(values = pal)# plot
plot_R <-
r_all_sliding_70_120 %>%
ggplot(aes(x = w_max, y = R)) +
geom_ribbon(aes(ymin = R_lower_95, ymax = R_upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 1, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated effective reproduction\nnumber (Re)") +
scale_colour_manual(values = pal)
R <- r_all_sliding_70_120 %>%
mutate(lower_95 = R_lower_95,
upper_95 = R_upper_95,
value = R,
measure = "R",
reference = 1)
r_R <- r_all_sliding_70_120 %>%
mutate(measure = "r",
value = r,
reference = 0) %>%
bind_rows(R)
fig2_3_70_120 <- r_R %>%
ggplot(aes(x = w_max, y = value)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(aes(yintercept = reference), linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0,0, "cm"),
strip.background = element_blank(),
strip.placement = "outside"
) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "", y = "") +
scale_colour_manual(values = pal) +
facet_grid(rows = vars(measure),
scales = "free_y",
switch = "y",
labeller = as_labeller(c(r = "Daily growth rate (r)",
R = "Effective reproduction\nnumber (Re)"))) We combine the estimated growth rates and effective reproduction numbers into a single figure.
ggpubr::ggarrange(fig2_3_0_18,
fig2_3_19_69,
fig2_3_70_120,
nrow = 3,
labels = "AUTO",
common.legend = TRUE,
legend = "bottom",
align = "hv") We want to explore the correlation between NHS Pathways reports and deaths, and assess the potential for reports to be used as an early warning system for disease resurgence.
Death data are publically available. We truncate the time series to avoid bias from reporting delay - we assume a conservative delay of three weeks.
We calculate Pearson’s correlation coefficient between deaths and NHS Pathways notifications using different lags. Confidence intervals are obtained using bootstrap. Note that results were also confirmed using Spearman’s rank correlation.
First we join the NHS Pathways and death data, and aggregate over all England:
## truncate death data for reporting delay
trunc_date <- max(dth$date_report) - delay_max
dth_trunc <- dth %>%
rename(date = date_report) %>%
filter(date <= trunc_date)
## join with notification data
all_data <- x %>%
filter(!is.na(nhs_region)) %>%
group_by(date, nhs_region) %>%
summarise(count = sum(count, na.rm = T)) %>%
ungroup %>%
inner_join(dth_trunc,
by = c("date","nhs_region"))
all_tot <- all_data %>%
group_by(date) %>%
summarise(count = sum(count, na.rm = TRUE),
deaths = sum(deaths, na.rm = TRUE)) We calculate correlation with lagged NHS Pathways reports from 0 to 30 days behind deaths:
## Calculate all correlations + bootstrap CIs
lag_cor <- data.frame()
for (i in 0:30) {
## lag reports
summary <- all_tot %>%
mutate(note_lag = lag(count, i)) %>%
## calculate rank correlation and bootstrap CI
getboot(.) %>%
mutate(lag = i)
lag_cor <- bind_rows(lag_cor, summary)
}
cor_vs_lag <- ggplot(lag_cor, aes(lag, r)) +
theme_bw() +
geom_ribbon(aes(ymin = r_low, ymax = r_hi), alpha = 0.2) +
geom_hline(yintercept = 0, lty = "longdash") +
geom_point() +
geom_line() +
labs(x = "Lag between NHS pathways and death data (days)",
y = "Pearson's correlation") +
large_txt
cor_vs_lagThis analysis suggests that the best lag is 16 days. We then compare and plot the number of deaths reported against the number of NHS Pathways reports lagged by 16 days.
all_tot <- all_tot %>%
rename(date_death = date) %>%
mutate(note_lag = lag(count, lag_cor$lag[l_opt]),
note_lag_c = (note_lag - mean(note_lag, na.rm = T)),
date_note = lag(date_death,16))
lag_mod <- glm(deaths ~ note_lag, data = all_tot, family = "quasipoisson")
summary(lag_mod)
##
## Call:
## glm(formula = deaths ~ note_lag, family = "quasipoisson", data = all_tot)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.351 -1.821 0.078 1.400 4.487
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.797e+00 5.064e-02 114.47 < 2e-16 ***
## note_lag 6.286e-06 4.641e-07 13.54 9.81e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 5.144602)
##
## Null deviance: 1082.51 on 25 degrees of freedom
## Residual deviance: 122.85 on 24 degrees of freedom
## (16 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
exp(coefficients(lag_mod))
## (Intercept) note_lag
## 329.277611 1.000006
exp(confint(lag_mod))
## 2.5 % 97.5 %
## (Intercept) 297.952542 363.381713
## note_lag 1.000005 1.000007
Rsq(lag_mod)
## [1] 0.8865142
mod_fit <- as.data.frame(predict(lag_mod, type = "link", se.fit = TRUE)[1:2])
all_tot_pred <-
all_tot %>%
filter(!is.na(note_lag)) %>%
mutate(pred = mod_fit$fit,
pred.se = mod_fit$se.fit,
low = exp(pred - 1.96*pred.se),
hi = exp(pred + 1.96*pred.se))
glm_fit <- all_tot_pred %>%
filter(!is.na(note_lag)) %>%
ggplot(aes(x = note_lag, y = deaths)) +
geom_point() +
geom_line(aes(y = exp(pred))) +
geom_ribbon(aes(ymin = low, ymax = hi), alpha = 0.3, col = "grey") +
theme_bw() +
labs(y = "Daily number of\ndeaths reported",
x = "Daily number of NHS Pathways reports") +
large_txt
glm_fitThis is a comparison of gamma versus lognormal distribution for the serial interval used to convert r to R in our analysis. Both distributions are parameterised with mean 4.7 and standard deviation 2.9.
SI_param <- epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale, w = 0.5)
SI_distribution2 <- distcrete::distcrete("lnorm", interval = 1,
meanlog = log(4.7),
sdlog = log(2.9), w = 0.5)
SI_dist1 <- data.frame(x = SI_distribution$r(1e5))
SI_dist1 <- count(SI_dist1, x) %>%
ggplot() +
geom_col(aes(x = x, y = n)) +
labs(x = "Serial interval (days)", y = "Frequency") +
scale_x_continuous(breaks = seq(0, 30, 5)) +
theme_bw()
SI_dist2 <- data.frame(x = SI_distribution2$r(1e5))
SI_dist2 <- count(SI_dist2, x) %>%
ggplot() +
geom_col(aes(x = x, y = n)) +
labs(x = "Serial interval (days)", y = "Frequency") +
scale_x_continuous(breaks = seq(0, 200, 20), limits = c(0, 200)) +
theme_bw()
ggpubr::ggarrange(SI_dist1,
SI_dist2,
nrow = 1,
labels = "AUTO") We reproduce the window analysis with either a 7 or 21 days window for sensitivity purposes.
First with the 7 days window:
## set moving time window (1/2/3 weeks)
w <- 7
# create empty df
r_all_sliding_7days <- NULL
## make data for model
x_model_all_moving <- x %>%
filter(!is.na(nhs_region)) %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
unique_dates <- unique(x_model_all_moving$date)
for (i in 1:(length(unique_dates) - w)) {
date_i <- unique_dates[i]
date_i_max <- date_i + w
model_data <- x_model_all_moving %>%
filter(date >= date_i & date < date_i_max) %>%
mutate(day = as.integer(date - date_i)) %>%
day_of_week()
mod <- glm(n ~ day * nhs_region + day_of_week,
data = model_data,
family = 'quasipoisson')
# get growth rate
r <- get_r(mod)
r$w_min <- date_i
r$w_max <- date_i_max
# combine all estimates
r_all_sliding_7days <- bind_rows(r_all_sliding_7days, r)
}
#serial interval distribution
SI_param = epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale,
w = 0.5)
#convert growth rates r to R0
r_all_sliding_7days <- r_all_sliding_7days %>%
mutate(R = epitrix::r2R0(r, SI_distribution),
R_lower_95 = epitrix::r2R0(lower_95, SI_distribution),
R_upper_95 = epitrix::r2R0(upper_95, SI_distribution))# plot
plot_growth <-
r_all_sliding_7days %>%
ggplot(aes(x = w_max, y = r)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(colour = guide_legend(title = "",override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated daily growth rate (r)") +
scale_colour_manual(values = pal)plot_R <- r_all_sliding_7days %>%
ggplot(aes(x = w_max, y = R)) +
geom_ribbon(aes(ymin = R_lower_95, ymax = R_upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 1, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated effective reproduction\nnumber (Re)") +
scale_colour_manual(values = pal)
R <- r_all_sliding_7days %>%
mutate(lower_95 = R_lower_95,
upper_95 = R_upper_95,
value = R,
measure = "R",
reference = 1)
r_R <- r_all_sliding_7days %>%
mutate(measure = "r",
value = r,
reference = 0) %>%
bind_rows(R)
r_R_7 <- r_R %>%
ggplot(aes(x = w_max, y = value)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(aes(yintercept = reference), linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0,0, "cm"),
strip.background = element_blank(),
strip.placement = "outside"
) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "", y = "") +
scale_colour_manual(values = pal) +
facet_grid(rows = vars(measure),
scales = "free_y",
switch = "y",
labeller = as_labeller(c(r = "Daily growth rate (r)",
R = "Effective reproduction\nnumber (Re)")))Then with the 21 days window:
## set moving time window (1/2/3 weeks)
w <- 21
# create empty df
r_all_sliding_21days <- NULL
## make data for model
x_model_all_moving <- x %>%
filter(!is.na(nhs_region)) %>%
group_by(date, nhs_region) %>%
summarise(n = sum(count))
unique_dates <- unique(x_model_all_moving$date)
for (i in 1:(length(unique_dates) - w)) {
date_i <- unique_dates[i]
date_i_max <- date_i + w
model_data <- x_model_all_moving %>%
filter(date >= date_i & date < date_i_max) %>%
mutate(day = as.integer(date - date_i)) %>%
day_of_week()
mod <- glm(n ~ day * nhs_region + day_of_week,
data = model_data,
family = 'quasipoisson')
# get growth rate
r <- get_r(mod)
r$w_min <- date_i
r$w_max <- date_i_max
# combine all estimates
r_all_sliding_21days <- bind_rows(r_all_sliding_21days, r)
}
#serial interval distribution
SI_param = epitrix::gamma_mucv2shapescale(4.7, 2.9/4.7)
SI_distribution <- distcrete::distcrete("gamma", interval = 1,
shape = SI_param$shape,
scale = SI_param$scale,
w = 0.5)
#convert growth rates r to R0
r_all_sliding_21days <- r_all_sliding_21days %>%
mutate(R = epitrix::r2R0(r, SI_distribution),
R_lower_95 = epitrix::r2R0(lower_95, SI_distribution),
R_upper_95 = epitrix::r2R0(upper_95, SI_distribution))# plot
plot_growth <-
r_all_sliding_21days %>%
ggplot(aes(x = w_max, y = r)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(colour = guide_legend(title = "",override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated daily growth rate (r)") +
scale_colour_manual(values = pal)# plot
plot_R <-
r_all_sliding_21days %>%
ggplot(aes(x = w_max, y = R)) +
geom_ribbon(aes(ymin = R_lower_95, ymax = R_upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(yintercept = 1, linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0.5,0.5, "cm")) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "",
y = "Estimated effective reproduction\nnumber (Re)") +
scale_colour_manual(values = pal)
R <- r_all_sliding_21days %>%
mutate(lower_95 = R_lower_95,
upper_95 = R_upper_95,
value = R,
measure = "R",
reference = 1)
r_R <- r_all_sliding_21days %>%
mutate(measure = "r",
value = r,
reference = 0) %>%
bind_rows(R)
r_R_21 <- r_R %>%
ggplot(aes(x = w_max, y = value)) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95, fill = nhs_region), alpha = 0.1) +
geom_line(aes(colour = nhs_region)) +
geom_point(aes(colour = nhs_region)) +
geom_hline(aes(yintercept = reference), linetype = "dashed") +
theme_bw() +
scale_weeks +
theme(legend.position = "bottom",
plot.margin = margin(0.5,1,0,0, "cm"),
strip.background = element_blank(),
strip.placement = "outside"
) +
guides(color = guide_legend(title = "", override.aes = list(fill = NA)), fill = FALSE) +
labs(x = "", y = "") +
scale_colour_manual(values = pal) +
facet_grid(rows = vars(measure),
scales = "free_y",
switch = "y",
labeller = as_labeller(c(r = "Daily growth rate (r)",
R = "Effective reproduction\nnumber (Re)")))And we combine both outputs into a single plot:
ggpubr::ggarrange(r_R_7,
r_R_21,
nrow = 2,
labels = "AUTO",
common.legend = TRUE,
legend = "bottom")
lag_cor_reg <- data.frame()
for (i in 0:30) {
summary <-
all_data %>%
group_by(nhs_region) %>%
mutate(note_lag = lag(count, i)) %>%
## calculate rank correlation and bootstrap CI for each region
group_modify(~getboot(.x)) %>%
mutate(lag = i)
lag_cor_reg <- bind_rows(lag_cor_reg, summary)
}
cor_vs_lag_reg <-
lag_cor_reg %>%
ggplot(aes(lag, r, col = nhs_region)) +
geom_hline(yintercept = 0, lty = "longdash") +
geom_ribbon(aes(ymin = r_low, ymax = r_hi, col = NULL, fill = nhs_region), alpha = 0.2) +
geom_point() +
geom_line() +
facet_wrap(~nhs_region) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal, guide = F) +
theme_bw() +
labs(x = "Lag between NHS pathways and death data (days)", y = "Pearson's correlation", col = "NHS region") +
theme(legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(fill = NA)))
cor_vs_lag_regWe save the tables created during our analysis:
if (!dir.exists("excel_tables")) {
dir.create("excel_tables")
}
## list all tables, and loop over export
tables_to_export <- c("r_all_sliding", "lag_cor")
for (e in tables_to_export) {
rio::export(get(e),
file.path("excel_tables",
paste0(e, ".xlsx")))
}
## also export result from regression on lagged data
rio::export(lag_mod, file.path("excel_tables", "lag_mod.rds"))The following information documents the system on which the document was compiled.
This provides information on the operating system.
This provides information on the version of R used:
This provides information on the packages used:
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggnewscale_0.4.1 ggpubr_0.3.0 lubridate_1.7.8
## [4] chngpt_2020.5-13 cyphr_1.1.0 DT_0.13
## [7] kableExtra_1.1.0 janitor_2.0.1 remotes_2.1.1
## [10] projections_0.4.1 earlyR_0.0.1 epitrix_0.2.2
## [13] distcrete_1.0.3 incidence_1.7.1 rio_0.5.16
## [16] reshape2_1.4.4 rvest_0.3.5 xml2_1.3.2
## [19] linelist_0.0.40.9000 forcats_0.5.0 stringr_1.4.0
## [22] dplyr_0.8.5 purrr_0.3.4 readr_1.3.1
## [25] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.0
## [28] tidyverse_1.3.0 here_0.1 reportfactory_0.0.5
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 selectr_0.4-2 ggsignif_0.6.0 ellipsis_0.3.1
## [5] rprojroot_1.3-2 snakecase_0.11.0 fs_1.4.1 rstudioapi_0.11
## [9] farver_2.0.3 fansi_0.4.1 splines_3.6.3 knitr_1.28
## [13] jsonlite_1.6.1 broom_0.5.6 dbplyr_1.4.3 compiler_3.6.3
## [17] httr_1.4.1 backports_1.1.7 assertthat_0.2.1 Matrix_1.2-18
## [21] cli_2.0.2 htmltools_0.4.0 prettyunits_1.1.1 tools_3.6.3
## [25] gtable_0.3.0 glue_1.4.1 Rcpp_1.0.4.6 carData_3.0-3
## [29] cellranger_1.1.0 vctrs_0.3.0 nlme_3.1-144 matchmaker_0.1.1
## [33] crosstalk_1.1.0.1 xfun_0.14 ps_1.3.3 openxlsx_4.1.5
## [37] lifecycle_0.2.0 rstatix_0.5.0 MASS_7.3-51.5 scales_1.1.1
## [41] hms_0.5.3 sodium_1.1 yaml_2.2.1 curl_4.3
## [45] gridExtra_2.3 stringi_1.4.6 kyotil_2019.11-22 boot_1.3-24
## [49] pkgbuild_1.0.8 zip_2.0.4 rlang_0.4.6 pkgconfig_2.0.3
## [53] evaluate_0.14 lattice_0.20-38 labeling_0.3 htmlwidgets_1.5.1
## [57] cowplot_1.0.0 processx_3.4.2 tidyselect_1.1.0 plyr_1.8.6
## [61] magrittr_1.5 R6_2.4.1 generics_0.0.2 DBI_1.1.0
## [65] pillar_1.4.4 haven_2.2.0 foreign_0.8-75 withr_2.2.0
## [69] mgcv_1.8-31 survival_3.1-8 abind_1.4-5 modelr_0.1.8
## [73] crayon_1.3.4 car_3.0-7 utf8_1.1.4 rmarkdown_2.1
## [77] viridis_0.5.1 grid_3.6.3 readxl_1.3.1 data.table_1.12.8
## [81] callr_3.4.3 reprex_0.3.0 digest_0.6.25 webshot_0.5.2
## [85] munsell_0.5.0 viridisLite_0.3.0